# proteomics table
proteomics_tables <- list( thermo_fisher = readxl::read_excel("/Users/melinaklostermann/Documents/projects/PURA/02_R_new_pip/04-BioID-SILAC/03-Proteomics/data-without-outlier-rep-3/MOL13920_noRepl3_SwissHum_perc_precDet_int_ratio_TTEST_Peptide-high_proteins.xlsx", sheet = 2),
maxquant_matching = readxl::read_excel("/Users/melinaklostermann/Documents/projects/PURA/02_R_new_pip/04-BioID-SILAC/03-Proteomics/XX_reprocessed_data/MOL13920_wo-repl3_MQ1670_withMBR_proteinGroups_LFQ1_matching.xlsx"),
maxquant_no_matching = readxl::read_excel("/Users/melinaklostermann/Documents/projects/PURA/02_R_new_pip/04-BioID-SILAC/03-Proteomics/XX_reprocessed_data/MOL13920_wo-repl3_MQ1670_noMBR_proteinGroups_LFQ1.xlsx"))
p_cut_prot <- 0.05
map(proteomics_tables, ~nrow(.x))
## $thermo_fisher
## [1] 5606
##
## $maxquant_matching
## [1] 5040
##
## $maxquant_no_matching
## [1] 5023
proteomics_tables[[1]] <- proteomics_tables[[1]] %>% dplyr::rename(., gene_name = `Gene Symbol`)
proteomics_tables[[2]] <- proteomics_tables[[2]] %>% dplyr::rename(., gene_name = `Gene symbol`)
proteomics_tables[[3]] <- proteomics_tables[[3]] %>% dplyr::rename(., gene_name = `Gene symbol`)
Found proteins, that are not matched to any gene:
map(proteomics_tables, ~sum(is.na(.x$gene_name)))
## $thermo_fisher
## [1] 0
##
## $maxquant_matching
## [1] 91
##
## $maxquant_no_matching
## [1] 0
Genes, that are matched to multiple proteins:
map(proteomics_tables, ~sum(duplicated(.x$gene_name)))
## $thermo_fisher
## [1] 3
##
## $maxquant_matching
## [1] 93
##
## $maxquant_no_matching
## [1] 3
map(proteomics_tables, ~.x[duplicated(.x$gene_name),]$gene_name)
## $thermo_fisher
## [1] "SMCR7L; MIEF1" "CDKN2A" "TMPO"
##
## $maxquant_matching
## [1] "CDKN2A" "RAB34" "TMPO" NA NA NA NA NA
## [9] NA NA NA NA NA NA NA NA
## [17] NA NA NA NA NA NA NA NA
## [25] NA NA NA NA NA NA NA NA
## [33] NA NA NA NA NA NA NA NA
## [41] NA NA NA NA NA NA NA NA
## [49] NA NA NA NA NA NA NA NA
## [57] NA NA NA NA NA NA NA NA
## [65] NA NA NA NA NA NA NA NA
## [73] NA NA NA NA NA NA NA NA
## [81] NA NA NA NA NA NA NA NA
## [89] NA NA NA NA NA
##
## $maxquant_no_matching
## [1] "CDKN2A" "RAB34" "TMPO"
Removing both duplicated genes and unknown genes
proteomics_tables <- lapply(proteomics_tables, function(x){
x <- x[!is.na(x$gene_name),]
x <- x[!duplicated(x$gene_name),]
})
all = c(proteomics_tables[[1]]$gene_name, proteomics_tables[[2]]$gene_name, proteomics_tables[[3]]$gene_name) %>% unique() %>% na.omit()
overlap_mat <- cbind(
thermo_fisher = all %in% proteomics_tables[[1]]$gene_name,
maxquant_matching = all %in% proteomics_tables[[2]]$gene_name,
maxquant_no_matching = all %in% proteomics_tables[[3]]$gene_name)
fit <- eulerr::euler(overlap_mat)
plot(fit, quantities = TRUE)
head(all[!(all %in% proteomics_tables[[2]]$gene_name)])
## [1] "STARD7" "ASDURF" "BECN1" "BRK1" "CHCHD5" "CHST14"
# decoy matches and matches to contaminant should be removed --> already happened?
# any(proteomics_tables[[2]]$Reverse == "+", na.rm = T)
# any(proteomics_tables[[2]]$`Potential contaminant` == "+", na.rm = T)
#
# any(proteomics_tables[[3]]$Reverse == "+", na.rm = T)
# any(proteomics_tables[[3]]$`Potential contaminant` == "+", na.rm = T)
# LFQ table
LFQs <- list(thermo_fisher = proteomics_tables[[1]][,c(grepl(pattern = "(Normalized)", colnames(proteomics_tables[[1]])) |
grepl(pattern = "gene_name", colnames(proteomics_tables[[1]])))] %>%
as.data.frame(),
maxquant_matching = proteomics_tables[[2]][,c(grepl(pattern = "LFQ", colnames(proteomics_tables[[2]]))|
grepl(pattern = "gene_name", colnames(proteomics_tables[[2]])))] %>%
as.data.frame(),
maxquant_no_matching = proteomics_tables[[3]][,c(grepl(pattern = "LFQ", colnames(proteomics_tables[[3]]))|
grepl(pattern = "gene_name", colnames(proteomics_tables[[3]])))] %>%
as.data.frame())
LFQs <- lapply(LFQs, function(x){
colnames(x) <- c("gene_name", "kd1", "kd2", "kd3", "kd4", "wt1", "wt2", "wt3", "wt4")
return(x)})
LFQs_t <- map(LFQs, ~reshape2::melt(.x))
LFQs_t2 <- full_join(LFQs_t[[1]], LFQs_t[[2]], by = c("gene_name", "variable"), suffix = c(".tf", ".mq_ma"))
LFQs_t2 <- full_join(LFQs_t2, LFQs_t[[3]], by = c("gene_name", "variable"), suffix = c("", ".mq_noma"))
LFQs_t3 <- reshape2::melt(LFQs_t2)
colnames(LFQs_t3) <- c("gene_name", "sample", "processing", "abundance")
ggplot(LFQs_t2, aes(x=log10(value.tf), y= log10(value.mq_ma)))+
xlim(5,11)+
ylim(5,11)+
ggrastr::rasterise(geom_point(size = 0.1), dpi = 400)+
ggrastr::rasterise(ggpointdensity::geom_pointdensity(size = 0.1), dpi = 400)+
xlab("thermofisher abundances")+
ylab("maxquant - matching abundances")+
geom_point(data=LFQs_t2[(LFQs_t2$gene_name == "PURA" & grepl(LFQs_t2$variable, pattern = "kd")), ], aes(x=log10(value.tf), y= log10(value.mq_ma)), color = "orange", size = 0.5)+
geom_point(data=LFQs_t2[(LFQs_t2$gene_name == "PURA" & grepl(LFQs_t2$variable, pattern = "wt")), ], aes(x=log10(value.tf), y= log10(value.mq_ma)), color = "red", size = 0.5)+
theme_paper()+
theme(legend.key.size = unit(0.15, 'cm'))
#ggsave(paste0(outpath, "Scatter_tf_MQ.pdf"), width = 7, height = 6, units = "cm")
ggplot(LFQs_t2, aes(x=log10(value), y= log10(value.mq_ma)))+
ggrastr::rasterise(geom_point(size = 0.1), dpi = 400)+
ggrastr::rasterise(ggpointdensity::geom_pointdensity(size = 0.1), dpi = 400)+
xlab("maxquant - no matching abundances")+
ylab("maxquant - matching abundances")+
geom_point(data=LFQs_t2[(LFQs_t2$gene_name == "PURA" & grepl(LFQs_t2$variable, pattern = "wt")), ], aes(x=log10(value), y= log10(value.mq_ma)), color = "red", size = 0.5)+
geom_point(data=LFQs_t2[(LFQs_t2$gene_name == "PURA" & grepl(LFQs_t2$variable, pattern = "kd")), ], aes(x=log10(value), y= log10(value.mq_ma)), color = "orange", size = 0.5)+
theme_paper()+
theme(legend.key.size = unit(0.15, 'cm'))
#ggsave(paste0(outpath, "Scatter_MQ_match_no_match.pdf"), width = 7, height = 6, units = "cm")
–> We exclude proteins with 2 or 3 missing values in either of the conditions
# turn 0 into NA
LFQs[[1]][LFQs[[1]]==0] = NA
LFQs[[2]][LFQs[[2]]==0] = NA
LFQs[[3]][LFQs[[3]]==0] = NA
# sum fall outs
LFQs <- lapply(LFQs, function(x){
x$na_kd <- apply(x,1, function(y) sum(is.na(y[2:5])))
x$na_wt <- apply(x,1, function(y) sum(is.na(y[6:9])))
return(x)
})
LFQ_filt <- LFQs %>% map(., ~dplyr::filter(.x, (na_wt %in% c(0,1)) & (na_kd %in% c(0,1))))
LFQ_4_zeros <- LFQs %>% map(., ~dplyr::filter(.x, (((na_wt == 4) & (na_kd %in% c(0,1)))| ((na_kd == 4)& (na_wt %in% c(0,1))))))
#saveRDS(LFQ_filt, paste0(outpath, "LFQ_tables.rds"))
Number of Proteins after zero filtering: (max one missing value per condition)
map(LFQ_filt, ~ nrow(.x))
## $thermo_fisher
## [1] 5373
##
## $maxquant_matching
## [1] 4453
##
## $maxquant_no_matching
## [1] 3537
Proteins with fall-out only in one condition: (4 zeros in one condition and max 1 zero in other condition)
map(LFQ_4_zeros, ~ .x$gene_name)
## $thermo_fisher
## [1] "STARD7" "EEF2K; LOC101930123" "GMFB"
## [4] "LMTK2" "MIF4GD" "RAD51AP2"
## [7] "SDF2" "CYB561" "DHRS11"
## [10] "FYCO1" "KLC4" "KPRP"
## [13] "OXNAD1" "SLC26A6" "TMEM87B"
##
## $maxquant_matching
## [1] "ADAM17" "ARHGAP18" "ARHGAP29" "ATP11A" "BNIP1" "C1orf122"
## [7] "CAPN15" "CDT1" "CGNL1" "CNOT8" "CYFIP2" "DHX33"
## [13] "DTL" "DUSP27" "ENG" "FAM71B" "GDAP2" "GOLGA7"
## [19] "GPN3" "H2AC20" "HAUS2" "HDAC6" "KDM5B" "KIF20A"
## [25] "MDFIC" "MGME1" "MRPL54" "MTERF3" "NCOA3" "NDUFAF6"
## [31] "NOP10" "NUDT3" "OGA" "PCDH1" "PIK3C2A" "PKP1"
## [37] "PM20D2" "PPIL3" "PPTC7" "PRMT6" "PRORSD1P" "PTGR2"
## [43] "SELENOK" "SERF2" "SLC2A4" "TRIM16" "UBE2E3" "UBR7"
## [49] "YPEL5"
##
## $maxquant_no_matching
## [1] "COX7C" "PURA" "AAK1" "ACAD11" "ADI1" "APLP2" "ATF1"
## [8] "ATF7" "B4GALT7" "C1QTNF6" "CAP2" "CASP2" "CBFB" "CCNH"
## [15] "CD47" "CDT1" "CMC1" "CNOT11" "COMMD10" "CUL5" "CYFIP2"
## [22] "CYP2S1" "DCAF7" "DHX33" "DUSP27" "EFEMP1" "ELOF1" "EPM2A"
## [29] "EPN2" "FAM71B" "FGFR1OP" "GINS1" "GLMP" "GMEB2" "HDAC6"
## [36] "JPH1" "KANK2" "LAMA5" "LAMC1" "LMAN2L" "LMNB2" "LRP8"
## [43] "LZTFL1" "MASTL" "MINDY4" "MSH5" "MSTO1" "NAGLU" "NEK5"
## [50] "NFYA" "NMI" "NTHL1" "NUDT3" "OGA" "OSBPL11" "OSGEP"
## [57] "P3H4" "PEF1" "PEX3" "PI4KA" "PIK3C2A" "PIK3C2B" "PLXNA2"
## [64] "PPP6R3" "PSEN2" "RAP2B" "RPIA" "RWDD1" "S100A14" "SCAP"
## [71] "SFT2D1" "SH3YL1" "SIRT5" "SLC44A1" "SLC6A8" "SLC7A6" "SMAP1"
## [78] "SMIM7" "SPAG5" "SPRYD7" "SRGAP2" "ST7" "TBC1D5" "TMEM41A"
## [85] "TMEM63B" "TRIP11" "UQCC2" "VPS28" "VPS51" "VPS53" "WDR45B"
## [92] "YRDC" "ZBTB11" "ZNF830" "ZNHIT2" "ZPR1"
map(LFQ_4_zeros, ~ nrow(.x))
## $thermo_fisher
## [1] 15
##
## $maxquant_matching
## [1] 49
##
## $maxquant_no_matching
## [1] 96
There are multiple options for differential analysis
Use boxplot to check if the samples have medians centered. if not, do median centering.
–> yes
###################
# function DEqMS
###################
f_DEqMS <- function(protein_matrix, pep_count_table){
# model
##################
condition = as.factor(c(rep("kd",4), rep("wt",4)))
design = model.matrix(~0+condition)
contrasts <- c("conditionkd-conditionwt")
contrast <- makeContrasts(contrasts=contrasts, levels = design)
# fit model to matrix
########################
fit1 = lmFit(protein_matrix, design = design)
fit2 = contrasts.fit(fit1, contrasts = contrast)
fit3 <- eBayes(fit2)
# correct bias of variance estimate
####################################
fit3$count = pep_count_table[rownames(fit3$coefficients),"count"]
fit4 <- spectraCounteBayes(fit3)
VarianceBoxplot(fit4, main = "Fit of variance estimate",
xlab="peptide count + 1")
VarianceScatterplot(fit4, main = "Fit of variance estimate",
xlab="peptide count + 1")
# adding gene info to results
DEqMS.results = outputResult(fit4, coef_col = 1)
DEqMS.results$gene_name <- rownames(DEqMS.results)
return(DEqMS.results)
}
DEqMS is based on limma, but adjustes the p-values in the end for the variance given for the specific number of unique peptides found
#####################
# peptide count tables
########################
# Maxquant: we use minimum peptide count among six samples
# count unique+razor peptides used for quantification
pep.count.table = list( thermo_fisher = data.frame(count = proteomics_tables[[1]]$`# Unique Peptides`,
row.names = proteomics_tables[[1]]$gene_name),
maxquant_mat = data.frame(count = rowMins(as.matrix(proteomics_tables[[2]][,20:27])),
row.names = proteomics_tables[[2]]$gene_name),
maxquant_nomat = data.frame(count = rowMins(as.matrix(proteomics_tables[[3]][,20:27])),
row.names = proteomics_tables[[3]]$gene_name))
# Minimum peptide count of some proteins can be 0
# add pseudocount 1 to all proteins
pep.count.table = map(pep.count.table, ~mutate(.x, count = count+1))
# run
rownames(protein.matrix[[1]]) = LFQ_filt[[1]]$gene_name
rownames(protein.matrix[[2]]) = LFQ_filt[[2]]$gene_name
rownames(protein.matrix[[3]]) = LFQ_filt[[3]]$gene_name
deqms <- map2(protein.matrix, pep.count.table, ~ f_DEqMS(protein_matrix = .x, pep_count_table = .y))
titles <- as.list(names(deqms))
map2(deqms, titles, ~ggplot(.x, aes(x = logFC, y = (-log10(sca.adj.pval))))+
geom_point(color ="grey", shape =1)+
geom_point(data = .x[(.x$sca.adj.pval< 0.05) & (abs(.x$logFC) > log(0.5)), ],
aes(x= logFC, y = (-log10(sca.adj.pval))))+
geom_point(data = .x[.x$gene_name %in% c("CTNNA1", "SQSTM1", "LSM14A", "RBFOX2", "DDX6", "GAPDH", "TUBB", "PURA", "PURB"),],
aes(x= logFC, y = -log10(sca.adj.pval)), color = "orange")+
ggrepel::geom_label_repel( data = .x[.x$gene_name %in% c("CTNNA1", "SQSTM1", "LSM14A", "RBFOX2", "DDX6", "GAPDH", "TUBB","PURA", "PURB"),],
aes( x = logFC, y = (-log10(sca.adj.pval)), label = gene_name), max.overlaps = 10)+
xlab("Protein fold change (kd/wt) [log2]")+
ylab("adj. p-value (protein fold change)")+
theme_paper()+
theme(legend.position = "none", aspect.ratio = 1/1)+
ggtitle(paste(.y, "\n DEqMS differential analysis")))
## $thermo_fisher
##
## $maxquant_matching
##
## $maxquant_no_matching
–> striped
ggplot(proteomics_tables[[1]], aes(x = log2(`Abundance Ratio: (PurA-KD) / (WT)`), y = `Exp. q-value: Combined`))+
geom_point(color ="grey", shape =1)+
geom_point(data = proteomics_tables[[1]][(proteomics_tables[[1]]$`Exp. q-value: Combined`< 0.05) & (abs(log2(proteomics_tables[[1]]$`Abundance Ratio: (PurA-KD) / (WT)`)) > log(0.5)), ],
aes(x=log2(`Abundance Ratio: (PurA-KD) / (WT)`), y = (-log10(`Exp. q-value: Combined`))))+
geom_point(data = proteomics_tables[[1]][proteomics_tables[[1]]$gene_name %in% c("CTNNA1", "SQSTM1", "LSM14A", "RBFOX2", "DDX6", "GAPDH", "TUBB", "PURA", "PURB"),],
aes(x=log2(`Abundance Ratio: (PurA-KD) / (WT)`), y = -log10(`Exp. q-value: Combined`)), color = "orange")+
ggrepel::geom_label_repel( data = proteomics_tables[[1]][proteomics_tables[[1]]$gene_name %in% c("CTNNA1", "SQSTM1", "LSM14A", "RBFOX2", "DDX6", "GAPDH", "TUBB","PURA", "PURB"),],
aes( x =log2(`Abundance Ratio: (PurA-KD) / (WT)`), y = (-log10(`Exp. q-value: Combined`)), label = gene_name), max.overlaps = 10)+
xlab("Protein fold change (kd/wt) [log2]")+
ylab("adj. p-value (protein fold change)")+
theme_paper()+
theme(legend.position = "none", aspect.ratio = 1/1)+
ggtitle(paste(" Detection & quantification from pipeline"))
###############
# countplot PURA
#################
LFQ_filt_PURA <- map(LFQ_filt, ~.x[.x$gene_name=="PURA",])
LFQ_filt_PURA <- map(LFQ_filt_PURA, ~reshape2::melt(.x))
LFQ_filt_PURA <- map(LFQ_filt_PURA, ~.x[1:8,]) %>% map(., ~mutate(.x, condition = substr(variable,1,2)))
LFQ_filt_PURA_no_ma <- LFQ_4_zeros[[3]][LFQ_4_zeros[[3]]$gene_name=="PURA",] %>% reshape2::melt()
LFQ_filt_PURA_no_ma <- LFQ_filt_PURA_no_ma[1:8,] %>% mutate(., condition = substr(variable,1,2))
LFQ_filt_PURA[[1]]$processing <- "thermo fisher"
LFQ_filt_PURA[[2]]$processing <- "maxquant + matching"
LFQ_filt_PURA_no_ma$processing <- "maxquant, no matching"
gg_df <- rbind(LFQ_filt_PURA[[1]], LFQ_filt_PURA[[2]], LFQ_filt_PURA_no_ma)
ggplot(gg_df, aes(x = variable, y = log(value), color = processing))+
geom_point()+
ggtitle("Intensities of PURA from thermo fisher or maxquant processing")+
theme_paper()+
theme(panel.grid.major = element_line(color = "lightgrey", size = 0.25),
legend.position = "None")
ggplot(gg_df, aes(x = variable, y = log(value), color = processing))+
geom_point()+
theme_paper()+
theme(panel.grid.major = element_line(color = "lightgrey", size = 0.25),
legend.position = "None")
#ggsave(filename = paste0(outpath, "protein_counts.pdf"), width = 6, height = 6, units = "cm")
###############
# countplot PURB
#################
LFQ_filt_PURB <- map(LFQ_filt, ~.x[.x$gene_name=="PURB",])
LFQ_filt_PURB <- map(LFQ_filt_PURB, ~reshape2::melt(.x))
LFQ_filt_PURB <- map(LFQ_filt_PURB, ~.x[1:8,]) %>% map(., ~mutate(.x, condition = substr(variable,1,2)))
# LFQ_filt_PURB_no_ma <- LFQ_4_zeros[[3]][LFQ_4_zeros[[3]]$gene_name=="PURB",] %>% reshape2::melt()
# LFQ_filt_PURB_no_ma <- LFQ_filt_PURB_no_ma[1:8,] %>% mutate(., condition = substr(variable,1,2))
LFQ_filt_PURB[[1]]$processing <- "thermo fisher"
LFQ_filt_PURB[[2]]$processing <- "maxquant + matching"
LFQ_filt_PURB[[3]]$processing <- "maxquant, no matching"
gg_df <- rbind(LFQ_filt_PURB[[1]], LFQ_filt_PURB[[2]], LFQ_filt_PURB[[3]])
ggplot(gg_df, aes(x = variable, y = log(value), color = processing))+
geom_point()+
ggtitle("Intensities of PURB from thermo fisher or maxquant processing")
#LFQs[[3]][LFQs[[3]]$gene_name=="PURB",]
–> different quantification of proteins
peptides_tf <- readxl::read_excel("/Users/melinaklostermann/Documents/projects/PURA/02_R_new_pip/04-BioID-SILAC/03-Proteomics/peptide_lists/MOL13920/MOL13920_noRepl3_SwissHum_perc_precDet_int_ratio_TTEST_Peptide-high_peptides.xlsx")
peptides_mq <- readxl::read_excel("/Users/melinaklostermann/Documents/projects/PURA/02_R_new_pip/04-BioID-SILAC/03-Proteomics/peptide_lists/MOL13920/MOL13920_wo-repl3_MQ1670_withMBR_peptides_LFQ1.xlsx")
PURA_protein_id <- "Q00577"
PURB_protein_id <- "Q96QR8"
# peptides maxquant
peptides_mq <- peptides_mq %>%
rowwise() %>%
mutate(., protein_id = unlist(strsplit(`Leading razor protein`, split = "|", fixed = T ))[2],
protein_name = unlist(strsplit(`Leading razor protein`, split = "|", fixed = T))[3],
protein_name = unlist(strsplit(protein_name, split = "_", fixed = T))[1]) %>% as.data.frame(.)
peptides_PURA_mq <- peptides_mq %>% filter(protein_id == PURA_protein_id )
peptides_PURB_mq <- peptides_mq %>% filter(protein_id == PURB_protein_id )
# peptides thermo fisher
peptides_PURA_tf <- peptides_tf %>% filter(`Master Protein Accessions` == PURA_protein_id )
peptides_PURB_tf <- peptides_tf %>% filter(`Master Protein Accessions` == PURB_protein_id )
peptides_PURA_mq[,c("Sequence", "PEP")]
## Sequence PEP
## 1 FFFDVGSNK 1.6019e-02
## 2 GPGLGSTQGQTIALPAQGLIEFR 1.8044e-05
peptides_PURA_tf[,c("Annotated Sequence", "Abundance Ratio: (PurA-KD) / (WT)", "Qvality PEP", "Qvality q-value" , "Percolator q-Value (by Search Engine): Sequest HT")]
## # A tibble: 3 × 5
## `Annotated Sequence` `Abundance Rat…` `Qvality PEP` `Qvality q-val…`
## <chr> <dbl> <dbl> <dbl>
## 1 [R].GPGLGSTQGQTIALPAQGLIEFR.[… 0.554 0.00000000464 0.0000267
## 2 [R].FFFDVGSNK.[Y] 0.552 0.000998 0.000119
## 3 [R].NSITVPYK.[V] 1.12 0.0523 0.00446
## # … with 1 more variable:
## # `Percolator q-Value (by Search Engine): Sequest HT` <dbl>
peptides_PURB_mq[,c("Sequence", "PEP")]
## Sequence PEP
## 1 GGGEQETQELASK 0.00011241
## 2 NAITVPFK 0.03866400
peptides_PURB_tf[,c("Annotated Sequence", "Abundance Ratio: (PurA-KD) / (WT)", "Qvality PEP", "Qvality q-value" , "Percolator q-Value (by Search Engine): Sequest HT")]
## # A tibble: 2 × 5
## `Annotated Sequence` `Abundance Ratio: (PurA…` `Qvality PEP` `Qvality q-val…`
## <chr> <dbl> <dbl> <dbl>
## 1 [R].GGGEQETQELASK.[R] 0.699 0.000171 0.0000504
## 2 [R].NAITVPFK.[A] 0.694 0.0389 0.00339
## # … with 1 more variable:
## # `Percolator q-Value (by Search Engine): Sequest HT` <dbl>
# PURA
gg_pura_mq <- peptides_PURA_mq[,(grepl(pattern="LFQ", colnames(peptides_PURA_mq))|
grepl(pattern="Sequence", colnames(peptides_PURA_mq)))] %>% reshape2::melt(.)
gg_pura_tf <- peptides_PURA_tf[,(grepl(pattern="Normalized", colnames(peptides_PURA_tf))|
grepl(pattern="Annotated Sequence", colnames(peptides_PURA_tf)))] %>% reshape2::melt(.)
gg_pura_tf <- gg_pura_tf %>% rowwise(.) %>% mutate(`Annotated Sequence` = unlist(strsplit(`Annotated Sequence`, split = ".", fixed =T))[2]) %>%
dplyr::rename(Sequence = `Annotated Sequence`)
gg_pura_mq$condition = c(rep("kd1", 2), rep("kd2", 2), rep("kd3", 2), rep("kd4", 2),
rep("wt1", 2), rep("wt2", 2), rep("wt3", 2), rep("wt4", 2))
gg_pura_tf$condition = c(rep("kd1", 3), rep("kd2", 3), rep("kd3", 3), rep("kd4", 3),
rep("wt1", 3), rep("wt2", 3), rep("wt3", 3), rep("wt4", 3))
gg_pura_mq$processing = "maxquant"
gg_pura_tf$processing = "thermo fisher"
gg_pura <- rbind(gg_pura_mq, gg_pura_tf)
ggplot(gg_pura, aes(x = condition, y = log(value), color = processing ))+
geom_point()+
facet_wrap(~Sequence)+
theme_paper()
# PURB
gg_purb_mq <- peptides_PURB_mq[,(grepl(pattern="LFQ", colnames(peptides_PURB_mq))|
grepl(pattern="Sequence", colnames(peptides_PURB_mq)))] %>% reshape2::melt(.)
gg_purb_tf <- peptides_PURB_tf[,(grepl(pattern="Normalized", colnames(peptides_PURB_tf))|
grepl(pattern="Annotated Sequence", colnames(peptides_PURB_tf)))] %>% reshape2::melt(.)
gg_purb_tf <- gg_purb_tf %>% rowwise(.) %>% mutate(`Annotated Sequence` = unlist(strsplit(`Annotated Sequence`, split = ".", fixed =T))[2]) %>%
dplyr::rename(Sequence = `Annotated Sequence`)
gg_purb_mq$condition = c(rep("kd1", 2), rep("kd2", 2), rep("kd3", 2), rep("kd4", 2),
rep("wt1", 2), rep("wt2", 2), rep("wt3", 2), rep("wt4", 2))
gg_purb_tf$condition = c(rep("kd1", 2), rep("kd2", 2), rep("kd3", 2), rep("kd4", 2),
rep("wt1", 2), rep("wt2", 2), rep("wt3", 2), rep("wt4", 2))
gg_purb_mq$processing = "maxquant"
gg_purb_tf$processing = "thermo fisher"
gg_purb <- rbind(gg_purb_mq, gg_purb_tf)
ggplot(gg_purb, aes(x = condition, y = log(value), color = processing ))+
geom_point()+
facet_wrap(~Sequence)
–> third peptide only detected in thermofisher preprocessing
–> thrid peptide is only found once in thermofisher and then matched into all other samples, this did not happen for maxquant